
set more off
set matsize 11000


cd \Data

********************************************************************************
** This do file provides the summary statistics displayed Tables 1 and A3. In
**addition, the code produces the data summarized in Figures 1 and A5.
********************************************************************************


********************************************************************************
** Table 1: Summary Statistics by Year-Built

use cons_data.dta, clear
sort premise_id
merge m:1 premise_id using premise_characteristics.dta
keep if _merge == 3
drop _merge

keep if (zoning == "R-1" | zoning == "RD 1" | zoning == "RD 2" | zoning == "RD 3" | zoning == "RD 4" | zoning == "RD 5" | zoning == "RD-1" | zoning == "RD-2" | zoning == "RD-3" | zoning == "RD-4" | zoning == "RD-5" | zoning == "RD1" | zoning == "RD2" | zoning == "RD3" | zoning == "RD4" | zoning == "RD5" | zoning == "RD5PD")
drop if bedrooms_num == 0 | bedrooms_num == .
drop if Resdnc_sqft == 0 | Resdnc_sqft == .
keep if electric_heat == 0
keep if year_built > 1974 & year_built < 1983
sort year_built

** Summary stats by year built (in Table 1)
by year_built: sum kwh_daily Resdnc_sqft bedrooms_num stories central

drop if year_built == 1978 | year_built == 1979
gen post = 0
replace post = 1 if year_built >= 1980

** Pre-vs-Post differences displayed in the last column of Table 1
ttest Resdnc_sqft, by(post)
ttest bedrooms_num, by(post)
ttest stories, by(post)
ttest central, by(post)


********************************************************************************
** Table A3: Correlation between avg. temp and hour-specific temps

use cons_data.dta, clear
egen temp_max= rowmax(temp_1-temp_24)
sum temp_avg temp_max, detail
forvalues h = 1/24 {
	sum temp_`h', detail
	}

corr temp_avg temp_1-temp_24
matrix define temp_corr = r(C)
matrix define X = temp_corr[1..25, 1]


********************************************************************************
** Figure 1: Mean consumption by temperature and temperature histogram

use cons_data.dta, clear
keep if electric_heat == 0
keep if year_built >= 1975 & year_built <= 1982
gen temp_bin = round(temp_avg)
keep kwh_6 kwh_18 kwh_daily temp_bin
save Temp/Consumption_by_Temperature_Summary.dta, replace

foreach i in daily 6 18 {

	use Temp/Consumption_by_Temperature_Summary.dta, clear
	drop if temp_bin < 40 | temp_bin > 80
	collapse (mean) kwh_`i' (p25) kwh_`i'_p25 = kwh_`i' (p75) kwh_`i'_p75 = kwh_`i', by(temp_bin)
	mkmat kwh_`i' kwh_`i'_p25 kwh_`i'_p75
	
}

drop temp_bin

** Below are the mean, 25th, and 75th percentiles of daily consumption,
** 6am consumption, and 6pm consumption within discrete temperature bins ranging
** from 40 to 80 deg. F

svmat kwh_daily_p25
svmat kwh_daily
svmat kwh_daily_p75
svmat kwh_6_p25
svmat kwh_6
svmat kwh_6_p75
svmat kwh_18_p25
svmat kwh_18
svmat kwh_18_p75
gen temp_bin = _n + 39


** Create histogram of the daily average temperature (top right panel of Fig 1)

use cons_data.dta, clear
collapse (mean) temp_avg, by(year month day)
gen temp_bin = round(temp_avg)
gen count = 1
collapse (count) count, by(temp_bin)


********************************************************************************
** Figure A5: Solar exposure by mean year of construction

use premise_characteristics.dta, clear
collapse (mean) mean_year = year_built, by(szip)

** Note: Source for solar data by zipcode  https://www.google.com/get/sunroof#p=0

gen solar_viable = .
replace solar_viable = 87 if szip == 95608
replace solar_viable = 86 if szip == 95610
replace solar_viable = . if szip == 95615
replace solar_viable = 91 if szip == 95621
replace solar_viable = 95 if szip == 95624
replace solar_viable = 87 if szip == 95626
replace solar_viable = 85 if szip == 95628
replace solar_viable = 93 if szip == 95630
replace solar_viable = . if szip == 95632
replace solar_viable = . if szip == 95638
replace solar_viable = . if szip == 95639
replace solar_viable = 98 if szip == 95655
replace solar_viable = 91 if szip == 95660
replace solar_viable = 96 if szip == 95661
replace solar_viable = 82 if szip == 95662
replace solar_viable = 93 if szip == 95670
replace solar_viable = 83 if szip == 95673
replace solar_viable = . if szip == 95678
replace solar_viable = . if szip == 95683
replace solar_viable = . if szip == 95690
replace solar_viable = . if szip == 95693
replace solar_viable = 96 if szip == 95742
replace solar_viable = 99 if szip == 95757
replace solar_viable = 97 if szip == 95758
replace solar_viable = 73 if szip == 95811
replace solar_viable = 72 if szip == 95814
replace solar_viable = 76 if szip == 95815
replace solar_viable = 62 if szip == 95816
replace solar_viable = 66 if szip == 95817
replace solar_viable = 58 if szip == 95818
replace solar_viable = 70 if szip == 95819
replace solar_viable = 78 if szip == 95820
replace solar_viable = 86 if szip == 95821
replace solar_viable = 89 if szip == 95822
replace solar_viable = 95 if szip == 95823
replace solar_viable = 88 if szip == 95824
replace solar_viable = 81 if szip == 95825
replace solar_viable = 93 if szip == 95826
replace solar_viable = 94 if szip == 95827
replace solar_viable = 96 if szip == 95828
replace solar_viable = 95 if szip == 95829
replace solar_viable = 85 if szip == 95830
replace solar_viable = 92 if szip == 95831
replace solar_viable = 97 if szip == 95832
replace solar_viable = 87 if szip == 95833
replace solar_viable = 93 if szip == 95834
replace solar_viable = 97 if szip == 95835
replace solar_viable = 68 if szip == 95837
replace solar_viable = 86 if szip == 95838
replace solar_viable = 88 if szip == 95841
replace solar_viable = 94 if szip == 95842
replace solar_viable = 95 if szip == 95843
replace solar_viable = 87 if szip == 95864

scatter solar_viable mean_year
